$title  demand.GMS

* ###################################

* Per capita income, consumption patterns, and CO2 emissions

* Journal of the Association of Environmental and Resource Economists

* Justin Caron and Thibault Fally

* April 2021

*###################################


* ESTIMATE DEMAND SYSTEM PARAMETERS


$set SLASH \
$if %system.filesys% == UNIX $set SLASH /


$if not set datadir $set datadir "data%SLASH%"
$setglobal datadir %datadir%


* ---------------------------------------------------------------------------------------------
* OPTIONS :

* CHOSE SPECIFICATION
* choices : tc,  theta4, tc_noenergy, theta8, homonotc 
* for f-stat on energy goods, get sse from "tc_noenergy"
$if not set spec $set spec tc

* subset of regions to include
$if not set regsubset $set regsubset rall

* MINIMIZE ERRORS ON LOGS OR CONSUMPTION SHARES ?
* choices : log, consshare, logweighted
$ if not set objective $set objective logweighted
*$ if not set objective $set objective consshare

* load gravity data from stata or gams ?
* choices : stata, gams
$ if not set gravitydata $set gravitydata gams

* skip reporting ?
* choices : yes, no
$ if not set skipreporting $set skipreporting yes


* BOOSTRAP ?
$if not set boot $set boot no
* set nb of boostrap iterations:
$if not set itr $set itr 4


* SELECT DATASET TO USE - DONT FORGET TO CHECK WHERE TO LOAD GRAVITY ESTIMATES FROM

* with gas and gdt aggregated
$if not set ds $set ds gtap8gas

$include loaddata_gtap8.gms


* ------------------------------------
* IMPORT  GRAVITY ESTIMATES

parameter       coeffs stores all sector-specific coefficients
                phiest
                tcostest
                logphiest
                IM
                EX
                cst;

* with IV
*$gdxin estimates\gravityestimates_%ds%_internationaltrade.gdx
$gdxin estimates%SLASH%gravityestimates_%ds%.gdx
$load  coeffs phiest im ex cst tcostest

parameter esttheta;
esttheta(i) = 0;

display coeffs;


* --  DEFINE SECTORS WHICH ARE MOSTLY INTERMEDIATES

parameter sharefd % FD in vdm (dom output);
set intermediates(i);

sharefd(i) = (sum((r,g), vdfm(i,g,r) + vifm(i,g,r)) - sum((j,r), vdfm(i,j,r) + vifm(i,j,r)))/ sum((r,g), vdfm(i,g,r) + vifm(i,g,r));

* defined as "intermediate" if  less than 10 % of production goes to final demand
intermediates(i)$(sharefd(i)<0.1) = yes;
display sharefd, intermediates;

* -- DEFINE SERVICE AND TRADABLE SECTORS SECTORS

*set     serv(i) service sectors / CMN, DWE,ISR,OBS, OFI,OSG,ROS,WTR,TRD,CNS, OTP, ATP, WTP, GDT, ELY/
set     serv(i) service sectors /osg/
        tradables(i) the tradable sectors;

tradables(i) = yes;
tradables(serv) = no;
display tradables;


* -----------------------------------------------------------
* NLLS Demand estimations
* -----------------------------------------------------------
* the only exogenous parameters needed are: x(i,r), per capita expenditure, w(n) = wage = PCI


parameter       x(i,r)          per capita expenditure
                w(r)            wage = PCI
                indexp(i)       industry total expenditure,
                logphi,
                phi
                bhat;

set i_(i), r_(r),g_(g),
        rall(r) set of 94 regions
        rICP(r) set of regions with ICP prices;
i_(i) = no;



* -- SELECT WHICH PHI TO USE HERE
*$if "%gravitydata%" == "stata" logphi(i,r) = importdata(i,r,"log_Phi_v2");
*$if "%gravitydata%" == "gams"  phi(i,r) = phiest(i,r);


$if "%gravitydata%" == "gams"  phi(i,r) = phiest(i,r);



* -- SELECT WHICH SECTORS TO USE HERE :

* using all sectors for which we have estimates of PHI :
i_(i)$sum(r,phi(i,r))= yes;

* selecting regions :
rall(r)$sum(i,phi(i,r)) = yes;



* use this parameter in excel to get this set of regions :
set rskm(r) regions with skill ratio ensowments within the 2 middle quartiles
   /MYS, VNM, GEO,KAZ, EGY, ALB, PAK, NGA, SEN, THA, PHL, TUR, ZWE, LTU, COL, LVA, SVK, KOR, TUN, ARG, BOL, HRV, HUN, VEN, MLT, RUS, CZE, SVN, CHL, UKR, MUS, CRI, PAN, IRN, EST, POL,
  NZL, CYP, URY, BRA, ZAF, SGP, CAN, BWA, BGR/ ;
*
* other sets :

set red(r) / ALB, ARM, AZE, BGD, BGR, BLR, BOL, CHN, COL, ECU, EGY, GEO, GTM, IDN, IND, KAZ, KGZ, LKA, MAR, MYS, NGA, NIC, PAK, PER, PHL, PRY, ROU, SEN, THA, TUR, UKR, VNM, ZMB, ZWE /;

set blue(r) / ARG, BRA, BWA, CHL, CRI, CYP, CZE, EST, HRV, HUN, IRN, KOR, LTU, LVA, MEX, MLT, MUS, NZL, PAN, POL, RUS, SVK, SVN, TUN, URY, VEN, ZAF /;

set green(r) / AUS, AUT, BEL, CAN, CHE, DEU, ESP, FIN, FRA, GBR, GRC, HKG, IRL, ITA, JPN, NLD, PRT, SGP, SWE, TWN /;



set nored(r) / ARG, AUS, AUT, BEL, BRA, BWA, CAN, CHE, CHL, CRI, CYP, CZE, DEU, DNK, ESP, EST, ETH, FIN, FRA, GBR,
GRC, HKG, HRV, HUN, IRL, IRN, ITA, JPN, KHM, KOR, LAO, LTU, LUX, LVA, MDG, MEX, MLT,  MOZ, MUS, MWI, NLD, NOR,
NZL, PAN, POL, PRT, RUS, SGP, SVK, SVN, SWE, TUN, TWN, TZA, UGA, URY, USA, VEN, ZAF /;


set noblue(r) / ALB, ARM, AUS, AUT, AZE, BEL, BGD, BGR, BLR, BOL, CAN, CHE, CHN, COL, DEU, DNK, ECU, EGY, ESP, ETH,
FIN, FRA, GBR, GEO, GRC, GTM, HKG, IDN, IND, IRL, ITA, JPN, KAZ, KGZ, KHM, LAO, LKA, LUX, MAR, MDG,  MOZ, MWI,
MYS, NGA, NIC, NLD, NOR, PAK, PER, PHL, PRT, PRY, ROU, SEN, SGP, SWE, THA, TUR, TWN, TZA, UGA, UKR, USA, VNM, ZMB, ZWE /;


set nogreen(r) / ALB, ARG, ARM, AZE, BGD, BGR, BLR, BOL, BRA, BWA, CHL, CHN, COL, CRI, CYP, CZE, DNK, ECU, EGY, EST, ETH,
GEO, GTM, HRV, HUN, IDN, IND, IRN, KAZ, KGZ, KHM, KOR, LAO, LKA, LTU, LUX, LVA, MAR, MDG, MEX, MLT,  MOZ, MUS, MWI,
MYS, NGA, NIC, NOR, NZL, PAK, PAN, PER, PHL, POL, PRY, ROU, RUS, SEN, SVK, SVN, THA, TUN, TUR, TZA, UGA, UKR, URY, USA,
VEN, VNM, ZAF, ZMB, ZWE /;



* trick to have r_ defined as "base" set - need that for bootstrapping, cant use ORD on an unstable set
execute_unload 'setr.gdx', rall, ricp, red, blue, green, rskm;

$gdxin setr

* IF USING RED BLUE GREEN SETS, USE THIS PART OF THE CODE :
$if "%regsubset%"=="red" $load r_=red
$if "%regsubset%"=="blue" $load r_=blue
$if "%regsubset%"=="green" $load r_=green
$if "%regsubset%"=="rall" $load r_=rall
$if "%regsubset%"=="riq" $load r_=riq
$if "%regsubset%"=="rskm" $load r_=rskm


display r_;



* dropping intermediates
i_(intermediates) = no;

i_("coa") = yes;
i_("gas") = yes;


* DROPPING DWELLINGS :
i_("dwe") = no;


display i,i_, intermediates;
alias(i_,j_);
alias(r_,s_);

* define new g set
g_(i_) = yes; g_("c") = yes; g_("g") = yes; g_("i") = yes;


parameter nbr, nbi;
nbr = card(r_);
nbi = card(i_);

display nbr, nbi;
display r_, i_;


* definine per-capita expenditure only on selected sectors:
w(r)$pop(r) = 10e8* sum(i_,fd(i_,r)) /pop(r);
x(i,r)$pop(r)  = 10e8 * fd(i,r) / pop(r);
indexp(i)  =  sum(r_,fd(i,r_));


**********
* logging the weightes
* indexp(i)  = log(sum(r_,fd(i,r_)));

* take maximum expenditure share
* HELPS FOR VERY SMALL SECTORS SUCH AS COAL
indexp(i)  = smax(r_, x(i,r_)/w(r_));

display indexp;

logphi(i,r)$phi(i,r) =log(phi(i,r));


parameter expshare, sectdrop;
expshare(i_,r_) = fd(i_,r_) / sum(i_.local,fd(i_,r_));

sectdrop(i_,r_) = 1;

* make it clean and drop observations for all sectors:
sectdrop(i_,r_)$(expshare(i_,r_) < 0.00005) =0 ;

* for paper drop in reporting only, not estimation.. (matters for R2)
sectdrop(i_,r_) = 1;
sectdrop(i_,r_)$(NOT fd(i_,r_)) =0 ;

* compute some statistics
parameter       fittedPCexp     fitted per capita expenditure
                fittedexp       fitted expenditure
*                fittedexpshare  fitted expenditure share
                sstot total sum of squares
                rsquared
                nobs number of observations
                Fstat F-test statistic
                nbp number of parameters in model
                df degrees of freedom for Fstat
                sigma2hat estimated variance of regression
		modelselection;

* ----------------------------------------------
* for bootstrapping :

set boot /1*%itr%/;
option seed=081567;

parameter  wt(r)        weight in the objective
           bootcoef(boot,*,*)
           dim          number of ctries
           rdraw        random draw from the pool of ctries
           cardzz(r)    index on each observation
           wtchk        weight check
;
wt(r_)=1;

alias (r_,k,kk,z,zz);
*cardzz(k,kk)=(ord(k)-1)*card(k) + ord(kk);
* PROBLEM CANNOT USE ORD ON AN "UNSTABLE SET", see correction "trick" above
cardzz(k) = ord(k);
dim=card(k);
display dim, cardzz;

* which country-sector pairs are missing
set missingx(i,r);
missingx(i_,r_) = yes;
missingx(i_,r_)$x(i_,r_) = no;

display missingx;


* ---------------------------------------------------------
* -- DEFINE MODEL :

variables sse_;

positive variables      sigma_scale_,  sigma_(i), lambda_(r), theta_, delta_,
 fe_(i) the fixed effect
 mu_(i);

equations  obj_log, obj_consshare, obj_logweighted, budget, commontheta, icppriceindex, sigma_fact, commondelta, est_theta;

* minimizing LOG error terms
obj_log.. sse_ =e=      sum((i_,r_)$x(i_,r_), wt(r_) * sqr(log(x(i_,r_)) - (log(fe_(i_)) - sigma_(i_)*log(lambda_(r_))
                + mu_(i_)*logphi(i_,r_)  )));

* minimizing consumption share error terms
obj_consshare.. sse_ =e= sum((i_,r_)$x(i_,r_), wt(r_) * sqr(x(i_,r_)/w(r_) - fe_(i_) * (lambda_(r_)**(-sigma_(i_)))
                         * (phi(i_,r_)**mu_(i_))  /w(r_) ));

* minimizing LOG error terms weighted by total expenditure by sector
obj_logweighted.. sse_ =e=      sum((i_,r_)$x(i_,r_), indexp(i_) 
   * sectdrop(i_,r_)
		* wt(r_) * sqr(log(x(i_,r_)) - (log(fe_(i_)) - sigma_(i_)*log(lambda_(r_))
                + mu_(i_)*logphi(i_,r_)  )));


budget(r_) ..   sum(i_, (lambda_(r_)**(-sigma_(i_))) * fe_(i_) *(phi(i_,r_)**mu_(i_))) =e= w(r_);

* for specification 3, impose a common theta across sectors :
commontheta(i_) ..      mu_(i_)  =e= ((sigma_(i_) * sigma_scale_) -1) / theta_ ;

* for specification 4, impose a common delta across sectors :
commondelta(i_) ..      mu_(i_) =e= ((sigma_(i_) * sigma_scale_) -1) * delta_ / bhat(i_) ;

* for specification 5, use estimated thetas :
est_theta(i_) ..      mu_(i_) =e= ((sigma_(i_) * sigma_scale_) -1) / esttheta(i_) ;


* initialize variables:
sse_.L = 0;
sigma_.L(i_) = 1;
lambda_.L(r_)$w(r_) = w("usa")/w(r_);
fe_.L(i_) = w("usa") * 1/card(i_);
mu_.L(i_) = 0;
theta_.L = 1;
sigma_scale_.L = 10;
mu_.UP(i_) =  1e10;
sigma_scale_.LO = 1E-7;
sigma_.LO(i_) = 1E-7;
lambda_.LO(r_)= 1E-7;
fe_.LO(i_) = 1E-7;
theta_.LO = 1E-7;

* fix one sigma_ to one (textiles: one that is not a service industry) :
sigma_.FX("tex") =1;
lambda_.FX("USA") =10;

* should be secondary energy only
set ene(i) / coa, gas, ely, p_c /;

parameter forstata;

forstata("fd",i_,r_) = x(i_,r_);
forstata("expenditure share",i_,r_) = x(i_,r_) / sum(i_.local, x(i_,r_));
forstata("fd","all ene",r_) = sum(ene, x(ene,r_));
forstata("logpcfd",i_,r_)$x(i_,r_) = log(x(i_,r_));
forstata("logpcfd","all ene",r_) = log(forstata("fd","all ene",r_));

* testing sensitivity of inclusion of investment and gov. expenditure
forstata("logfd - c",i_,r_)$fd_disagg("c",i_,r_) = log(10e8 * fd_disagg("c",i_,r_) / pop(r_));
forstata("logfd - g",i_,r_)$fd_disagg("g",i_,r_) = log(10e8 * fd_disagg("g",i_,r_) / pop(r_));
forstata("logfd - i",i_,r_)$fd_disagg("i",i_,r_) = log(10e8 * fd_disagg("i",i_,r_) / pop(r_));
forstata("logfd - cg",i_,r_)$fd_disagg("c",i_,r_) = log(10e8 * (fd_disagg("c",i_,r_) + fd_disagg("g",i_,r_)) / pop(r_));

forstata("logtotalfd - c",i_,r_) = log(10e8 * sum(i_.local,fd_disagg("c",i_,r_)) / pop(r_));
forstata("logtotalfd - g",i_,r_) = log(10e8 * sum(i_.local,fd_disagg("g",i_,r_)) / pop(r_));
forstata("logtotalfd - i",i_,r_) = log(10e8 * sum(i_.local,fd_disagg("i",i_,r_)) / pop(r_));
forstata("logtotalfd - cg",i_,r_) = log(10e8 * (sum(i_.local,fd_disagg("c",i_,r_) + fd_disagg("g",i_,r_))) / pop(r_));

forstata("GAMS weights",i_,r_) = indexp(i_);
forstata("logPHI",i_,r_) = logphi(i_,r_);
forstata("logpci",i_,r_) = log(w(r_));
forstata("logpci","all ene",r_) = log(w(r_));

* this is total expenditure for the country (for potential weighting)
forstata("total exp",i_,r_) = expenditure(r_);
forstata("total exp","all ene",r_) = expenditure(r_);
forstata("population",i_,r_) =  pop(r_);


parameter  lambda, mu,  sse, fe, theta, sigma_scale, eta, avgmu;
theta=0;
*sigma_scale=0;
parameter specificationstats for reporting;

* -- define specification :

$if "%spec%"=="tc"  model nlls /obj_%objective%, budget/; nbp("non-homoth")= card(r_) + 3*card(i_);
$if "%spec%"=="tc_noenergy" sigma_.FX(ene)=1;  model nlls /obj_%objective%, budget/; nbp("non-homoth")= card(r_) + 3*card(i_);


* for one-step estimation:

$if "%spec%"=="notc" mu_.FX(i_) = 0;  model nlls /obj_%objective%, budget/; nbp("non-homoth")= card(r_) + 2*card(i_);

$if "%spec%"=="commontheta"  model nlls /obj_%objective%, budget, commontheta/; nbp("non-homoth")= card(r_) + 2*card(i_) + 1;

$if "%spec%"=="theta4"  model nlls /obj_%objective%, budget, commontheta/; theta_.FX = 4; nbp("non-homoth")= card(r_) + 2*card(i_) ;

$if "%spec%"=="theta5"  model nlls /obj_%objective%, budget, commontheta/; theta_.FX = 5; nbp("non-homoth")= card(r_) + 2*card(i_) ;

$if "%spec%"=="theta6" model nlls /obj_%objective%, budget, commontheta/; theta_.FX = 6; nbp("non-homoth")= card(r_) + 2*card(i_) ;

$if "%spec%"=="theta8"  model nlls /obj_%objective%, budget, commontheta/; theta_.FX = 8; nbp("non-homoth")= card(r_) + 2*card(i_) ;


$if "%spec%"=="homonotc" mu_.FX(i_) = 0; sigma_.FX(i_) = 1;  model nlls /obj_%objective%, budget/; nbp("non-homoth")= card(r_) + 1*card(i_) ;

$if "%spec%"=="homotc"  sigma_.FX(i_) = 1; model nlls /obj_%objective%, budget/; nbp("non-homoth")= card(r_) + 2*card(i_) ;

$if "%spec%"=="homotheta4"  sigma_.FX(i_) = 1; model nlls /obj_%objective%, budget, commontheta/; theta_.FX = 4; nbp("non-homoth")= card(r_) + 2*card(i_) ;

$if "%spec%"=="esttheta"   model nlls /obj_%objective%, budget, est_theta/; nbp("non-homoth")= card(r_) + 2*card(i_) ;


* -- Solve model :

* make a savepoint for boostraping
nlls.savepoint=1;
solve nlls using nlp minimizing sse_;
nlls.savepoint=0;

$if "%spec%"=="tc_noenergy" display sse_.l; 
$if "%spec%"=="tc_noenergy" $exit

* calculate statistics
coeffs("sigma","coeff", i_) = sigma_.L(i_);
coeffs("mu","coeff", i_) = mu_.L(i_);
coeffs("GTAP ARMINGTON","coeff", i_) = esubd(i_);
coeffs("estimated theta","coeff", i_) = esttheta(i_);
coeffs("indexp","coeff", i_) = indexp(i_) ;


nobs = card(i_) * card(r_);

* redefining income classifications
set lowincome(r), lowincome2;
set middleincome(r), middleincome2;
set highincome(r);

$ontext
* using the world bank guidelines for analytical classification in 2007 (GNI/capita):
 https://datahelpdesk.worldbank.org/knowledgebase/articles/378833-how-are-the-income-group-thresholds-determined


Low income
Lower middle income
Upper middle income
High income

<= 935
936-3,705
3,706-11,455
> 11,455
$offtext

* lowincome
lowincome(r)$(pcexp(r) < 935) = 1;

* lower middle income
lowincome2(r)$(pcexp(r) > 935 and pcexp(r) < 3705) = 1;
	     
* upper middle income
middleincome2(r)$(pcexp(r) > 3705 and pcexp(r) < 11455) = 1;

* aggregating middle income
middleincome(r)$(pcexp(r) > 935 and pcexp(r) < 11455) = 1;

highincome(r)$(pcexp(r) > 11455) = 1;

* count number of countries in each group:
parameter country_counter;

country_counter("low") = sum(lowincome(r_), 1);
country_counter("lowermid") = sum(lowincome2(r_), 1);
country_counter("uppermid") = sum(middleincome2(r_), 1);
country_counter("higher") = sum(highincome(r_), 1);

display country_counter;


set	se(i)	secondary energy        /coa,ely,gas, p_c/  ;
set	nse(i)  non-energy goods;

nse(i_) = yes;
nse(se) = no;


* calculate the average elasticity of expenditures w.r.t phi :
* weighted by sectors size
avgmu("weighted") = sum(i_, indexp(i_) * mu_.L(i_)) / sum(i_, indexp(i_));
avgmu("weighted - energy goods") = sum(se, indexp(se) * mu_.L(se)) / sum(se, indexp(se));
avgmu("unweighted") = sum(i_,   mu_.L(i_)) / card(i_);

* calculate weighted Rsquare for logweighted specification
parameter weightedsstot, weightedrsquared;

weightedrsquared = 0;
weightedsstot = 0;


sse("non-homoth") = sse_.L;
sstot = sum((i_,r_)$x(i_,r_), sectdrop(i_,r_) *  indexp(i_) *  sqr(log(x(i_,r_)) - sum((i_.local,r_.local)$x(i_,r_), log(x(i_,r_)))/nobs  ));

modelselection("aic weighted","non-homoth") =  log(sse_.L /nobs) + 2* nbp("non-homoth") / nobs ;

* Schwarz = BIC

* in paper use this measure
modelselection("bic weighted","non-homoth") =  log(sse_.L /nobs) + log(nobs)* nbp("non-homoth") /nobs ;

modelselection("bic weighted2","non-homoth")$weightedsstot = nobs * log(sse_.L /weightedsstot) + log(nobs)* nbp("non-homoth");


* compute backed out theta:
* the scale of sigma matters and is not identified.
* ASSUMPTION: the smallest sigma =1
* this yields a correlation of 0.01
* the correlation goes to -0.23 as the scale of sigmas goes to infinity
coeffs("backed out theta","coeff",i_)$coeffs("mu","coeff", i_) = 
(coeffs("sigma","coeff", i_)/ smin(i_.local$((coeffs("sigma","coeff", i_) >0.05) and coeffs("mu","coeff", i_)),coeffs("sigma","coeff", i_))-1)/ coeffs("mu","coeff", i_);


parameter withinrsquare;

withinrsquare("to avg","all") = 1 - 
((sum((i_,r_)$x(i_,r_), indexp(i_) 
		* wt(r_) * sqr(log(x(i_,r_)) - (log(fe_.L(i_)) - sigma_.L(i_)*log(lambda_.L(r_))
                + mu_.L(i_)*logphi(i_,r_)  ))))

/ sum((i_,r_)$x(i_,r_), indexp(i_) * sqr(log(x(i_,r_)) - sum((r_.local)$x(i_,r_), log(x(i_,r_)))/card(r_)  )));


sigma2hat = sse_.L /(nobs -nbp("non-homoth")) ;
fe("non-homoth",i_) = fe_.L(i_) ;
lambda("non-homoth", r_) = lambda_.L(r_) ;
sigma_scale("non-homoth") = sigma_scale_.L ;
theta = theta_.L ;

modelselection("aic weighted","non-homoth") =  log(sse_.L /nobs) + 2* nbp("non-homoth") / nobs ;

modelselection("bic weighted","non-homoth") =  log(sse_.L /nobs) + log(nobs)* nbp("non-homoth") / nobs;


* AGGREGATE TO SMALLER SET OF SECTORS FOR REPORTING

SET  ii_	 Sectoral classifications /

MAN	Manufactured and Processed Goods
ENE
SRV
TRN
AGR

/;


$setglobal source implan

$eolcom !

* no EIS

SET	mapi(i,*)	Mapping from GTAP sectors to EPPA-10 sectors /
ATP.TRN,
B_T.AGR,
C_B.AGR,
CMN.MAN,
CMT.AGR,
CNS.MAN,
COA.ENE,
CRP.MAN,
OIL.ENE,
CTL.AGR,
DWE.SRV,
ELE.MAN,
ELY.ENE,
FMP.MAN,
FRS.AGR,
FSH.AGR,
GAS.ENE,
PDR.AGR,
WHT.AGR,
I_S.MAN,
ISR.SRV,
LEA.MAN,
LUM.MAN,
 MIL.AGR,
MVH.MAN,
NFM.MAN,
NMM.MAN,
OAP.AGR,
OBS.SRV,
OCR.AGR,
OFD.AGR,
OFI.SRV,
OME.MAN,
OMF.MAN,
OMN.MAN,
OMT.AGR,
OSD.AGR,
OSG.SRV,
OTN.MAN,
OTP.TRN,
P_C.ENE,
PCR.AGR,
PFB.AGR,
PPP.MAN,
ROS.SRV,
SGR.AGR,
TEX.MAN,
TRD.SRV,
V_F.AGR,
VOL.AGR,
WAP.MAN,
WTP.TRN,
WTR.SRV
/;



* calculate fitted expenditure shares :
fittedPCexp("non-homoth",i_,r_) = fe_.L(i_)* lambda_.L(r_)**(- sigma_.L(i_)) * (phi(i_,r_)**(mu_.L(i_)));

* homothetic, computed as partial effect
* use average sigma  -- choice doesn't matter too much if using shares only
parameter avg_sigma;

avg_sigma =    sum((r_,i_.local), sigma_.L(i_) * fd(i_,r_)) / (sum((i_.local,r_), fd(i_,r_) ));
        
display avg_sigma;

fittedPCexp("homoth partial",i_,r_) = fe_.L(i_)* lambda_.L(r_)**(- avg_sigma) * (phi(i_,r_)**(mu_.L(i_)));
fittedPCexp("non-homoth",i_,r_)$(not sectdrop(i_,r_)) = 0; 
fittedexp("non-homoth",i_,r_) = fittedPCexp("non-homoth",i_,r_) / 10e8* pop(r_)  ;
fittedexp("homoth partial",i_,r_) = fittedPCexp("homoth partial",i_,r_) / 10e8* pop(r_)  ;
fittedPCexp("Pci",i_,r_) = pcexp(r_);
fittedPCexp("expenditure",i_,r_) = expenditure(r_);
forstata("fitted nh",i_,r_)$x(i_,r_) = fittedPCexp("non-homoth",i_,r_);
forstata("fitted nh","all ene",r_) = sum(ene, fittedPCexp("non-homoth",ene,r_));

* computing expenditure shares
coeffs("expenditure share - lowincome","coeff", i_) = sum(lowincome, fd(i_,lowincome)) /sum((i_.local,lowincome), fd(i_,lowincome));
coeffs("expenditure share - middleincome","coeff", i_) =  sum(middleincome,fd(i_,middleincome)) /sum((i_.local,middleincome), fd(i_,middleincome));

coeffs("expenditure share - lowincome2","coeff", i_) = sum(lowincome2, fd(i_,lowincome2)) /sum((i_.local,lowincome2), fd(i_,lowincome2));
coeffs("expenditure share - middleincome2","coeff", i_) =  sum(middleincome2,fd(i_,middleincome2)) /sum((i_.local,middleincome2), fd(i_,middleincome2));

coeffs("expenditure share - highincome","coeff", i_) =  sum(highincome,fd(i_,highincome)) /sum((i_.local,highincome), fd(i_,highincome));
coeffs("expenditure share","coeff", i_) =  sum(r_,fd(i_,r_)) /sum((r_.local,i_.local), fd(i_,r_));

coeffs("expenditure share","coeff", "all ene") =  sum(r_,sum(ene,fd(ene,r_))) /sum((r_.local,i_.local), fd(i_,r_));


* if using only consumption:
coeffs("expenditure share - consumption only","coeff", i_) =  sum(r_, fd_disagg("c",i_,r_)) /sum((r_.local,i_.local),  fd_disagg("c",i_,r_));

coeffs("expenditure share - consumption only","coeff",  "all ene") =  sum(r_, sum(ene,fd_disagg("c",ene,r_))) /sum((r_.local,i_.local),  fd_disagg("c",i_,r_));

coeffs("expenditure share - lowincome","coeff", ii_) = sum((i_,lowincome)$mapi(i_,ii_), fd(i_,lowincome)) /sum((i_.local,lowincome), fd(i_,lowincome));
coeffs("expenditure share - middleincome","coeff", ii_) =  sum((i_,middleincome)$mapi(i_,ii_),fd(i_,middleincome)) /sum((i_.local,middleincome), fd(i_,middleincome));

coeffs("expenditure share - lowincome2","coeff", ii_) = sum((i_,lowincome2)$mapi(i_,ii_), fd(i_,lowincome2)) /sum((i_.local,lowincome2), fd(i_,lowincome2));
coeffs("expenditure share - middleincome2","coeff", ii_) =  sum((i_,middleincome2)$mapi(i_,ii_),fd(i_,middleincome2)) /sum((i_.local,middleincome2), fd(i_,middleincome2));


coeffs("expenditure share - highincome","coeff", ii_) =  sum((i_,highincome)$mapi(i_,ii_),fd(i_,highincome)) /sum((i_.local,highincome), fd(i_,highincome));
coeffs("expenditure share","coeff", ii_) =  sum((i_,r_)$mapi(i_,ii_),fd(i_,r_)) /sum((r_.local,i_.local), fd(i_,r_));


* calculate income elasticities
* using global consumption shares here (doesnt matter for correlations)
coeffs("incelast - mean shares","coeff", i_) = sigma_.L(i_) / (sum(i_.local,  sigma_.L(i_) * sum(r,fd(i_,r)) /sum((i_.local,r), fd(i_,r))));
parameter incelast, avgincelast;
IncElast("direct",i_,r_) = sigma_.L(i_)*(sum(i_.local,  fd(i_,r_))) /
                                               (sum(i_.local,  sigma_.L(i_) * fd(i_,r_) ));


IncElast("direct fitted dem",i_,r_) = sigma_.L(i_)*(sum(i_.local, forstata("fitted nh",i_,r_))) /
                                               (sum(i_.local,  sigma_.L(i_) * forstata("fitted nh",i_,r_)));

* weighted by final demand
avgincelast("direct","energy goods","all") = sum((r_,se), fd(se,r_) * IncElast("direct",se,r_)) /
	sum((r_,se), fd(se,r_) ) ;


* splitting the countries into low- middle and high income

* low income = to Thailand = 3000
* middle income = to Czech Republic = 15000


coeffs("incelast - mean shares - lowincome","coeff", i_) = sigma_.L(i_) / (sum(i_.local,  sigma_.L(i_) * sum(lowincome,fd(i_,lowincome)) /sum((i_.local,lowincome), fd(i_,lowincome))));
coeffs("incelast - mean shares - middleincome","coeff", i_) = sigma_.L(i_) / (sum(i_.local,  sigma_.L(i_) * sum(middleincome,fd(i_,middleincome)) /sum((i_.local,middleincome), fd(i_,middleincome))));
coeffs("incelast - mean shares - highincome","coeff", i_) = sigma_.L(i_) / (sum(i_.local,  sigma_.L(i_) * sum(highincome,fd(i_,highincome)) /sum((i_.local,highincome), fd(i_,highincome))));

avgincelast("direct","energy goods","lowincome") = sum((lowincome,se), fd(se,lowincome) * IncElast("direct",se,lowincome)) /
	sum((lowincome,se), fd(se,lowincome) ) ;


avgincelast("direct","energy goods","middleincome") = sum((middleincome,se), fd(se,middleincome) * IncElast("direct",se,middleincome)) /
	sum((middleincome,se), fd(se,middleincome) ) ;


avgincelast("direct","energy goods","highincome") = sum((highincome,se), fd(se,highincome) * IncElast("direct",se,highincome)) /
	sum((highincome,se), fd(se,highincome) ) ;


* splitting the countries into low- middle and high income

* low income = to Thailand = 3000
* middle income = to Czech Republic = 15000

* aggregate using FD shares as weights:

coeffs("incelast - mean shares","coeff", ii_) = sum(i_$mapi(i_,ii_), coeffs("incelast - mean shares","coeff", i_) * sum(r_, fd(i_,r_))) /
     sum(i_$mapi(i_,ii_),  sum(r_, fd(i_,r_)));



coeffs("incelast - mean shares - lowincome","coeff", ii_) = sum(i_$mapi(i_,ii_), coeffs("incelast - mean shares - lowincome","coeff", i_) * sum(lowincome, fd(i_,lowincome))) /
     sum(i_$mapi(i_,ii_),  sum(lowincome, fd(i_,lowincome)));

coeffs("incelast - mean shares - middleincome","coeff", ii_) = sum(i_$mapi(i_,ii_), coeffs("incelast - mean shares - middleincome","coeff", i_) * sum(middleincome, fd(i_,middleincome))) /
     sum(i_$mapi(i_,ii_),  sum(middleincome, fd(i_,middleincome)));
coeffs("incelast - mean shares - highincome","coeff", ii_) = sum(i_$mapi(i_,ii_), coeffs("incelast - mean shares - highincome","coeff", i_) * sum(highincome, fd(i_,highincome))) /
     sum(i_$mapi(i_,ii_),  sum(highincome, fd(i_,highincome)));

forstata("Incel",i_,r_) = IncElast("direct",i_,r_);


* using median country fitted consumption shares here
* median country = BGR  3496.850823
coeffs("incelast - median ctry","coeff", i_) = sigma_.L(i_)*(sum(i_.local,  fittedPCexp("non-homoth",i_,"BGR"))) /
                                               (sum(i_.local,  sigma_.L(i_) * fittedPCexp("non-homoth",i_,"BGR") ));

* weighted
rsquared("nh","all","total") = 1- sse_.L /sstot;

rsquared("nh","all","energy goods") = 1 -   sum((se,r_)$(x(se,r_) and sectdrop(se,r_)), sectdrop(SE,r_) *  indexp(SE) *  sqr(log(forstata("fitted nh",se,r_)) -  log(x(se,r_))  ))
 / 
  sum((se,r_)$x(se,r_), sectdrop(SE,r_) *  indexp(SE) *  sqr(log(x(se,r_)) - sum((se.local,r_.local)$x(se,r_), log(x(se,r_)))/ (card(se) * card(r_))  ))
;


* to identify role of income, replace income by median income
* keep alpha sigma phis constant

* median income: CRI	5183.095599	MEDIAN

* USA	43894.8786

* CHN	2212.110749

* AT MEDIAN INCOME COUNTRY

parameter counterfact_pcincome;

counterfact_pcincome(r_) = 5183;

positive variables      approx_lambda_(r) ;

equations approx_lambda_eq;

approx_lambda_eq(r_) ..   sum(i_, (approx_lambda_(r_)**(-sigma_(i_))) * fe_(i_) *(phi(i_,r_)**mu_(i_))) =e= counterfact_pcincome(r_);


approx_lambda_.L(r_) = lambda_.L(r_);
* fix: 
sigma_.FX(i_) = sigma_.L(i_);
mu_.FX(i_) = mu_.L(i_);
fe_.FX(i_) = fe_.L(i_);


model approx_lambda_MCP / approx_lambda_eq.approx_lambda_ /;

approx_lambda_MCP.iterlim = 100000;

solve approx_lambda_MCP using MCP ;


* calculate fitted expenditure shares :
fittedPCexp("at median income",i_,r_) = fe_.L(i_)* approx_lambda_.L(r_)**(- sigma_.L(i_)) * (phi(i_,r_)**(mu_.L(i_)));
fittedexp("at median income",i_,r_) = fittedPCexp("at median income",i_,r_) / 10e8* pop(r_) ;


counterfact_pcincome(r_) = 2212;

approx_lambda_.L(r_) = lambda_.L(r_);
* fix: 
sigma_.FX(i_) = sigma_.L(i_);
mu_.FX(i_) = mu_.L(i_);
fe_.FX(i_) = fe_.L(i_);


solve approx_lambda_MCP using MCP ;


* calculate fitted expenditure shares :
fittedPCexp("at china income",i_,r_) = fe_.L(i_)* approx_lambda_.L(r_)**(- sigma_.L(i_)) * (phi(i_,r_)**(mu_.L(i_)));
fittedexp("at china income",i_,r_) = fittedPCexp("at china income",i_,r_) / 10e8* pop(r_) ;

* 

counterfact_pcincome(r_) = 43894;

approx_lambda_.L(r_) = lambda_.L(r_);
* fix: 
sigma_.FX(i_) = sigma_.L(i_);
mu_.FX(i_) = mu_.L(i_);
fe_.FX(i_) = fe_.L(i_);



solve approx_lambda_MCP using MCP ;

* calculate fitted expenditure shares :
fittedPCexp("at usa income",i_,r_) = fe_.L(i_)* approx_lambda_.L(r_)**(- sigma_.L(i_)) * (phi(i_,r_)**(mu_.L(i_)));
fittedexp("at usa income",i_,r_) = fittedPCexp("at usa income",i_,r_) / 10e8* pop(r_) ;



* AT MEAN PRICES:

counterfact_pcincome(r_) = w(r_);
parameter phi_backup(i,r);

phi_backup(i_,r_) = phi(i_,r_);

phi(i_,r_) = sum(r_.local, phi(i_,r_) * sum(i_.local, fd(i_,r_)))/ sum((i_.local,r_.local),  fd(i_,r_));

display phi;


approx_lambda_.L(r_) = lambda_.L(r_);
* fix: 
sigma_.FX(i_) = sigma_.L(i_);
mu_.FX(i_) = mu_.L(i_);
fe_.FX(i_) = fe_.L(i_);


solve approx_lambda_MCP using MCP ;

* calculate fitted expenditure shares :
fittedPCexp("at mean phi",i_,r_) = fe_.L(i_)* approx_lambda_.L(r_)**(- sigma_.L(i_)) * (phi(i_,r_)**(mu_.L(i_)));
fittedexp("at mean phi",i_,r_) = fittedPCexp("at mean phi",i_,r_) / 10e8* pop(r_) ;

phi(i_,r_) = phi_backup(i_,r_);

* ----------

* "PARTIAL HOMOTHETIC", USING MU FROM NH VERSION
* using this in paper

* unfix:

fe_.LO(i_) = 1E-7;
fe_.up(i_) = 1E8;


* re-ininitialize variables
sse_.L = 0;
lambda_.L(r_)$w(r_) = w("usa")/w(r_);
fe_.L(i_) = w("usa") * 1/card(i_);

*re-solve with sigmas fixed at one
sigma_.FX(i_) = 1;
theta_.UP = 1E10;

$if "%spec%"=="theta4"  model nllshomoth /obj_%objective%, budget, commontheta/; theta_.FX = 4;  nbp("homoth") = card(r_) +  card(i_) ;
$if "%spec%"=="theta5"  model nllshomoth /obj_%objective%, budget, commontheta/; theta_.FX = 5; nbp("homoth") = card(r_) +  card(i_) ;
$if "%spec%"=="theta6"  model nllshomoth /obj_%objective%, budget, commontheta/; theta_.FX = 6; nbp("homoth") = card(r_) +  card(i_) ;
$if "%spec%"=="theta8"  model nllshomoth /obj_%objective%, budget, commontheta/; theta_.FX = 8; nbp("homoth") = card(r_) +  card(i_) ;
$if "%spec%"=="tc"  model nllshomoth /obj_%objective%, budget/;  nbp("homoth") = card(r_) +  card(i_) *2;



solve nllshomoth using nlp minimizing sse_;
* re-set to non-homoth for bootstrap
sigma_.UP(i_) = 10e7;
sigma_.LO(i_) = 1E-7;



*sigma2hat = sse_.L /(nobs -nbp("non-homoth")) ;
fe("homoth partial",i_) = fe_.L(i_) ;
lambda("homoth partial", r_) = lambda_.L(r_) ;
sigma_scale("homoth partial") = sigma_scale_.L ;

rsquared("h partial","all","total") = 1- sse_.L /sstot;

sse("homoth partial") = sse_.L;


* calculate fitted expenditure shares :
fittedPCexp("homoth partial",i_,r_) = fe_.L(i_)* lambda_.L(r_)**(- sigma_.L(i_)) * (phi(i_,r_)**(mu_.L(i_)));

fittedPCexp("homoth partial",i_,r_)$(not sectdrop(i_,r_)) = 0;

fittedexp("homoth partial",i_,r_) = fittedPCexp("homoth partial",i_,r_) / 10e8* pop(r_) ;


forstata("fitted h partial",i_,r_)$x(i_,r_) = fittedPCexp("homoth partial",i_,r_);
forstata("fitted h partial","all ene",r_) = sum(ene, fittedPCexp("homoth partial",ene,r_));


withinrsquare("to homoth partial","all")$sse("homoth partial") = 1 - (sse("non-homoth") /
sse("homoth partial")
);


* -- HOMOTHETIC VERSION

* unfix
mu_.UP(i_) =  1e10;
mu_.LO(i_) =  1e-10;


* re-ininitialize variables
sse_.L = 0;
lambda_.L(r_)$w(r_) = w("usa")/w(r_);
fe_.L(i_) = w("usa") * 1/card(i_);
mu_.L(i_) = 0;


*re-solve with sigmas fixed at one
sigma_.FX(i_) = 1;
theta_.UP = 1E10;


solve nllshomoth using nlp minimizing sse_;
* re-set to non-homoth for bootstrap
sigma_.UP(i_) = 10e7;
sigma_.LO(i_) = 1E-7;


*sigma2hat = sse_.L /(nobs -nbp("non-homoth")) ;
fe("homoth",i_) = fe_.L(i_) ;
lambda("homoth", r_) = lambda_.L(r_) ;
sigma_scale("homoth") = sigma_scale_.L ;
*theta = theta_.L ;

rsquared("h","all","total") = 1- sse_.L /sstot;

sse("homoth") = sse_.L;

* calculate fitted expenditure shares :
fittedPCexp("homoth",i_,r_) = fe_.L(i_)* lambda_.L(r_)**(- sigma_.L(i_)) * (phi(i_,r_)**(mu_.L(i_)));
fittedPCexp("homoth",i_,r_)$(not sectdrop(i_,r_)) = 0;
fittedexp("homoth",i_,r_) = fittedPCexp("homoth",i_,r_) / 10e8* pop(r_) ;
fittedexp("true",i_, r_) = fd(i_, r_) ;

forstata("fitted h",i_,r_)$x(i_,r_) = fittedPCexp("homoth",i_,r_);
forstata("fitted h","all ene",r_) = sum(ene, fittedPCexp("homoth",ene,r_));

withinrsquare("to homoth","all") = 1 - (sse("non-homoth") /
sse("homoth")
);


* compare to Homo no tc
* run homonotc and get the Rsquare

withinrsquare("to homoth notc","all") = 1 - (sse("non-homoth") /
964);

withinrsquare("to homoth notc","energy goods") = 1 -   sum((se,r_)$(x(se,r_) and sectdrop(se,r_)), sectdrop(SE,r_) *  indexp(SE) * sqr(log(forstata("fitted nh",se,r_)) -  log(x(se,r_))  ))
 /  sum((se,r_)$(x(se,r_) and sectdrop(se,r_)), sectdrop(SE,r_) *  indexp(SE) * sqr(log(forstata("fitted h",se,r_)) -  log(x(se,r_))  ))
;

* instead, do Rsquare relative to homoth partial (see above)
withinrsquare("to homoth","energy goods") = 1 -   sum((se,r_)$(x(se,r_) and sectdrop(se,r_)), sectdrop(SE,r_) *  indexp(SE) * sqr(log(forstata("fitted nh",se,r_)) -  log(x(se,r_))  ))
 /  sum((se,r_)$(x(se,r_) and sectdrop(se,r_)), sectdrop(SE,r_) *  indexp(SE) * sqr(log(forstata("fitted h",se,r_)) -  log(x(se,r_))  ))
;


* --  F-tests for parameter restrictions :

* 1 - test restriction of mu = 0
* import from notc case manually


$if "%objective%"=="log"        sse("no TC imp") = 14003.3885458859  ;
$if "%objective%"=="consshare"  sse("no TC imp") = 1.51084988080653   ;
$if "%objective%"=="logweighted"  sse("no TC imp") = 1794672.8200  ;

nbp("no TC imp") = card(r_) + 2 *card(i_) ;

fstat("mu=0")$(nbp("non-homoth") - nbp("no TC imp"))  = ((sse("no TC imp") - sse("non-homoth"))
         / (nbp("non-homoth") - nbp("no TC imp")))
         / (sse("non-homoth")
         / ( nobs - nbp("non-homoth"))) ;

* this F stat has an F distribution, with (p2 - p1, n - p2) degrees of freedom
df("1") = (nbp("non-homoth") - nbp("no TC imp"));
df("2") = nobs - nbp("non-homoth");

* so we need to compare to F(56,5002) .. at 5% significance level, that is about 1.35
* (according to http://www.ma.utexas.edu/users/davis/375/popecol/tables/f005.html)
* we get a value of 8.79 > 1.35

* 2 - test restriction of sigma = 1

fstat("sigma=1") = ((sse("homoth") - sse("non-homoth")) / (nbp("non-homoth") - nbp("homoth"))) / (sse("non-homoth") / ( nobs - nbp("non-homoth"))) ;

* for energy goods only: compute model with sigma = 1 for energy, get SSE and paste it here:
* run tc_noenergy specification
fstat("sigma=1 energy") = ((655.466   - sse("non-homoth")) / (4)) / (sse("non-homoth") / ( nobs - nbp("non-homoth"))) ;
* so we need to compare to F(4,5002) .. at 5% significance level, that is about 1.9
* according to http://www.socscistatistics.com/pvalues/fdistribution.aspx  highly signif <0.00001


* 3 - test restriction of mu = (sigma-1) / theta
* if high, mean mu is significantly different from (sigma-1) / theta in data

* import SSE of case with no-restriction (tc)
$if "%objective%"=="logweighted"  sse("TC imp") = 1515414.70042461;
nbp("TC imp") = card(r_) + 3 *card(i_) ;

fstat("commontheta")$(nbp("TC imp") - nbp("non-homoth")) = ((sse("non-homoth") - sse("TC imp")) / (nbp("TC imp") - nbp("non-homoth"))) / (sse("TC imp") / ( nobs - nbp("TC imp"))) ;
specificationstats("avg incel energy goods") = avgincelast("direct","energy goods","all"); 
specificationstats("avg incel energy goods - low income ctries") = avgincelast("direct","energy goods","lowincome") ;

specificationstats("avgmu - weighted") = avgmu("weighted");
specificationstats("avgmu - weighted - energy goods") = avgmu("weighted - energy goods");

*specificationstats("F-stat mu=0") = fstat("mu=0") ;
specificationstats("F-stat sigma=1") = fstat("sigma=1") ;
specificationstats("F-stat sigma=1 energy") = fstat("sigma=1 energy");

*specificationstats("F-stat commontheta") = fstat("commontheta");

specificationstats("F-stat flex nh") = 100000000000000000000;


specificationstats("R2") = rsquared("nh","all","total");
specificationstats("R2 energy goods") = rsquared("nh","all","energy goods");

specificationstats("within R2") = withinrsquare("to homoth","all");
specificationstats("within R2 energy goods") = withinrsquare("to homoth","energy goods");


specificationstats("aic weighted") = modelselection("aic weighted","non-homoth");
specificationstats("bic weighted") = modelselection("bic weighted","non-homoth");

specificationstats("p") = nbp("non-homoth");
specificationstats("n") = nobs;



specificationstats("df1") = df("1");
specificationstats("df2") = df("2");
specificationstats("sse unweighted") = sse("non-homoth");
*specificationstats("sse homoth") = sse("homoth");
specificationstats("sstot") = sstot;
specificationstats("avgmu - unweighted") = avgmu("unweighted") ;

specificationstats("bic unweighted") = modelselection("bic unweighted","non-homoth");
specificationstats("within Rsquare") = withinrsquare("to homoth","all");
specificationstats("within Rsquare (energy goods)") = withinrsquare("to homoth","energy goods");

specificationstats("theta") = theta;

display specificationstats;


execute_unload 'estimates\estimates_%ds%_%objective%_%spec%_%regsubset%.gdx';




* exit here if no bootstrapping
$if %boot%==no $exit


$label bootstrap
* ---------------------------------------------------------------------------
* BOOTSTRAPPING
* ---------------------------------------------------------------------------

parameter nbbootiterations /%itr%/;
set params / sigma, mu, eta, theta "incelast - mean shares", "incelast - median ctry" /;
parameter bootcorr, bootsharecorr;

option solprint=off;
option limrow=0;
option limcol=0;

bootcoef(boot,"sigma",i_) = 0; bootcoef(boot,"eta",i_) = 0;bootcoef(boot,"mu",i_) = 0;

* -- RE-ESTIMATE LAMBDA FOR MEDIAN COUNTRY
* define model to re-estimate the lambda for BGR (median country) for income elasticity calculation
parameter fittedPCexpBGR(i), saveparams just to save the parameter estimates;
saveparams("sigma",i_) = 0;
positive variables lambdaBGR_; variable sseBGR_; equations obj_fitlambdaBGR;
* minimizing error in budget constraint for BGR


obj_fitlambdaBGR.. sseBGR_ =e= sum(i_, sqr(sum(i_.local, (lambdaBGR_**(-saveparams("sigma",i_))) * saveparams("fe",i_) *(phi(i_,"bgr")**saveparams("mu",i_))
        *(icpprice(i_,"bgr")**saveparams("eta",i_))) - w("bgr")));

model fitBGR /obj_fitlambdaBGR/;

parameter reportlambda;

loop(boot,
wt(r_)=0;
        loop(k,
        rdraw=round(uniform(0.5,dim+0.5))
                loop((z)$(cardZZ(z) eq rdraw),
                 wt(z)=wt(z)+1;
*  note : countries can be reselected
                );
        );

display wt;

wtchk=round(sum((k),wt(k))-dim,5);
display wtchk;
abort$(wtchk ne 0) "Inconsistent Weighting";

*execute_loadpoint "nlls_p.gdx";
solve nlls using nlp minimizing sse_;


bootcoef(boot,"sigma",i_)=sigma_.l(i_)$(nlls.solvestat eq 1);
bootcoef(boot,"mu",i_)=mu_.l(i_)$(nlls.solvestat eq 1);
bootcoef(boot,"eta",i_)=eta_.l(i_)$(nlls.solvestat eq 1);
bootcoef(boot,"fe",i_)=fe_.l(i_)$(nlls.solvestat eq 1);
bootcoef(boot,"theta",i_)=theta_.l$(nlls.solvestat eq 1);


bootcoef(boot,"incelast - mean shares",i_) =
        sigma_.L(i_) / (sum(i_.local,  sigma_.L(i_) * sum(r,fd(i_,r)) /sum((i_.local,r), fd(i_,r))));


* when using medium country, need to re-estimate its lambda in case it is out of the bootstrap sample
saveparams("eta",i_) =eta_.L(i_); saveparams("mu",i_) =mu_.L(i_); saveparams("fe",i_) = fe_.L(i_); saveparams("sigma",i_) =sigma_.L(i_);
lambdaBGR_.L = lambda_.L("bgr");
solve fitBGR using nlp minimizing sseBGR_;
fittedPCexpBGR(i_) = fe_.L(i_)* lambdaBGR_.L**(- sigma_.L(i_)) * (phi(i_,"BGR")**(mu_.L(i_)))* (ICPprice(i_,"BGR")**eta_.L(i_));
bootcoef(boot, "incelast - median ctry", i_) = sigma_.L(i_)*(sum(i_.local,  fittedPCexpBGR(i_))) / (sum(i_.local,  sigma_.L(i_) * fittedPCexpBGR(i_) ));
reportlambda(boot) = lambdaBGR_.L;



bootcoef(boot,"incelast - median ctry", i_) = sigma_.L(i_)*(sum(i_.local,  fd(i_,"BGR"))) / (sum(i_.local,  sigma_.L(i_) * fd(i_,"BGR") ));


bootcoef(boot,"factorcoeff","all")=factorcoeff_.l$(nlls.solvestat eq 1);

* do factor correlations here :
y(i_) = sigma_.L(i_);
*YY(f,i_) = avgfactorratio(f,i_,"direct","all");

bootcorr(boot,f,"noweight", ftype,avgtype)  =    (card(i_)*sum((i_), yy(f,i_,ftype,avgtype)*y(i_)) -sum((i_), yy(f,i_,ftype,avgtype))*sum((i_), y(i_))) /
        (
        (card(i_)*sum((i_), yy(f,i_,ftype,avgtype)**2) - (sum((i_), yy(f,i_,ftype,avgtype))**2))**(0.5)*
        (card(i_)*sum((i_), y(i_)**2) - (sum((i_), y(i_))**2))**(0.5));

bootcorr(boot,f,"expshare", ftype,avgtype)  = (sum((i_), weight(i_)* yy(f,i_,ftype,avgtype)*y(i_))
  -sum((i_), weight(i_)* yy(f,i_,ftype,avgtype))*sum((i_), weight(i_)* y(i_))) /
        (
        (sum((i_), weight(i_)* yy(f,i_,ftype,avgtype)**2) - (sum((i_), weight(i_)* yy(f,i_,ftype,avgtype))**2))**(0.5)*
        (sum((i_), weight(i_)* y(i_)**2) - (sum((i_), weight(i_)* y(i_))**2))**(0.5));



* do share correlations here :
* re calculate fitted expenditure shares :
fittedPCexp("non-homoth",i_,r_) = fe_.L(i_)* lambda_.L(r_)**(- sigma_.L(i_)) * phi(i_,r_)**(mu_.L(i_));
shares("exp fitted",i_,r_) = fittedPCexp("non-homoth",i_,r_) / sum((i_.local), fittedPCexp("non-homoth",i_,r_));
shares("exp fitted",i_,"world") = sum(r_, fittedPCexp("non-homoth",i_,r_)) / sum((i_.local,r_), fittedPCexp("non-homoth",i_,r_));
relshares("exp fitted",i_,r_) = shares("exp fitted",i_,r_) / shares("exp fitted",i_,"world");


loop(sharetype, loop(sharetype2,

XX(i_,r_) = relshares(sharetype,i_,r_);
XXX(i_,r_) = relshares(sharetype2,i_,r_) ;


bootsharecorr(boot,sharetype,sharetype2)$(sum((i_,r_),xx(i_,r_)) and sum((i_,r_),xxx(i_,r_))) = (card(i_)*card(r_)*sum((i_,r_), XX(i_,r_)*XXX(i_,r_)) -sum((i_,r_), XX(i_,r_))*sum((i_,r_), XXX(i_,r_))) /
        (
        sqrt(abs(card(i_)*card(r_)*sum((i_,r_), XX(i_,r_)*XX(i_,r_)) - (sum((i_,r_), XX(i_,r_))*sum((i_,r_), XX(i_,r_)))))*
        sqrt(abs(card(i_)*card(r_)*sum((i_,r_), XXX(i_,r_)*XXX(i_,r_)) - (sum((i_,r_), XXX(i_,r_))*sum((i_,r_), XXX(i_,r_))))));

););
* end share correlation
*- ---------------------------------
);

display bootcoef, bootsharecorr;

*       Rank the estimates
alias(boot,boot2);
parameter rank(boot,*,*)        rank order of estimate,
        rankcorr, ranksharecorr;

*rank(boot,"sigma",i_)=1+sum(boot2$(bootcoef(boot2,"sigma",i_)
*                                 gt bootcoef(boot,"sigma",i_)),1);

rank(boot,params,i_)=1+sum(boot2$(bootcoef(boot2,params,i_)
                                 gt bootcoef(boot,params,i_)),1);


rank(boot,"factorcoeff","all")=1+sum(boot2$(bootcoef(boot2,"factorcoeff","all")
                                 gt bootcoef(boot,"factorcoeff","all")),1);

rankcorr(boot,f,weighttype,ftype,avgtype)=1+sum(boot2$(bootcorr(boot2,f,weighttype,ftype,avgtype)
                                 gt bootcorr(boot,f,weighttype,ftype,avgtype)),1);

ranksharecorr(boot,sharetype,sharetype2)=1+sum(boot2$(bootsharecorr(boot2,sharetype,sharetype2)
                                 gt bootsharecorr(boot,sharetype,sharetype2)),1);

* note : several estimates can have the same rank here ..
* bug in their code? can lead to double counting later

display rank, ranksharecorr;


* correction:
parameter nbsame, nbsamecorr, nbsamesharecorr nb of elements with same rank;
nbsame(params,boot,i_) =0;
nbsamecorr(boot,f,weighttype,ftype,avgtype) =0;
nbsamesharecorr(boot,sharetype,sharetype2) =0;

loop(i_,
loop(params,
loop(boot,
        loop(boot2,
*nbsame(boot,i_)$(rank(boot,"sigma",i_) eq rank(boot2,"sigma",i_)) = nbsame(boot,i_) +1;
*rank(boot,"sigma",i_)$(nbsame(boot,i_) gt 1) = rank(boot,"sigma",i_) + 1;

nbsame(params,boot,i_)$(rank(boot,params,i_) eq rank(boot2,params,i_)) = nbsame(params,boot,i_) +1;
rank(boot,params,i_)$(nbsame(params,boot,i_) gt 1) = rank(boot,params,i_) + 1;


););););

loop(f,
loop(ftype,
loop(avgtype, loop(weighttype,
loop(boot,
        loop(boot2,
nbsamecorr(boot,f,weighttype,ftype,avgtype)$(rankcorr(boot,f,weighttype,ftype,avgtype) eq rankcorr(boot2,f,weighttype,ftype,avgtype)) = nbsamecorr(boot,f,weighttype,ftype,avgtype) +1;
rankcorr(boot,f,weighttype,ftype,avgtype)$(nbsamecorr(boot,f,weighttype,ftype,avgtype) gt 1) = rankcorr(boot,f,weighttype,ftype,avgtype) + 1;

););););););

loop(sharetype,
loop(sharetype2,
loop(boot,
        loop(boot2,
nbsamesharecorr(boot,sharetype,sharetype2)$(ranksharecorr(boot,sharetype,sharetype2) eq ranksharecorr(boot2,sharetype,sharetype2)) = nbsamesharecorr(boot,sharetype,sharetype2) +1;
ranksharecorr(boot,sharetype,sharetype2)$(nbsamesharecorr(boot,sharetype,sharetype2) gt 1) = ranksharecorr(boot,sharetype,sharetype2) + 1;

););););

display nbsamesharecorr, ranksharecorr;

execute_loadpoint "nlls_p.gdx";
Parameter
        median(*,*)     "median bootstrap value"
        mediancorr
        mediansharecorr
        bias(*,*)       "bias in bootstrap median minus first moment"

        st_err(*,*)     "bootstrap standard error (average upper and lower)"
        st_errcorr
        st_errsharecorr
        crit95up(*,*)   "upper critical value for the 95 conf. interval"
        crit95lo(*,*)   "lower critical value for the 95 conf. interval"
;

*median("sigma",i_)=
*sum(boot$(rank(boot,"sigma",i_) eq
*                              round(0.5*(card(boot2)))),
*                                bootcoef(boot,"sigma",i_))

median(params,i_)=
sum(boot$(rank(boot,params,i_) eq
                              round(0.5*(card(boot2)))),
                                bootcoef(boot,params,i_))

* added this to correct for the fact that sometimes several estimates have the same rank
*/
*sum(boot$(rank(boot,"sigma",i_) eq round(0.5*(card(boot2)))),1)

;

mediancorr(f,weighttype,ftype,avgtype)=
sum(boot$(rankcorr(boot,f,weighttype,ftype,avgtype) eq
                              round(0.5*(card(boot2)))),
                                bootcorr(boot,f,weighttype,ftype,avgtype));

mediansharecorr(sharetype,sharetype2)=
sum(boot$(ranksharecorr(boot,sharetype,sharetype2) eq
                              round(0.5*(card(boot2)))),
                                bootsharecorr(boot,sharetype,sharetype2));


display mediansharecorr;

bias("sigma",i_)=median("sigma",i_)-sigma_.l(i_);
bias("eta",i_)=median("eta",i_)-eta_.l(i_);
bias("mu",i_)=median("mu",i_)-mu_.l(i_);


st_err(params,i_)=ABS(0.5*     (
                                sum(boot$(rank(boot,params,i_) eq
                                        round(0.8413*card(boot2))),
                                bootcoef(boot,params,i_))-
                                sum(boot$(rank(boot,params,i_) eq
                                        round(0.1587*card(boot2))),
                                bootcoef(boot,params,i_))
                                ));



st_err("factorcoeff","all")=ABS(0.5*     (
                                sum(boot$(rank(boot,"factorcoeff","all") eq
                                        round(0.8413*card(boot2))),
                                bootcoef(boot,"factorcoeff","all"))-
                                sum(boot$(rank(boot,"factorcoeff","all") eq
                                        round(0.1587*card(boot2))),
                                bootcoef(boot,"factorcoeff","all"))
                                ));

st_errcorr(f,weighttype,ftype,avgtype)=ABS(0.5*     (
                                sum(boot$(rankcorr(boot,f,weighttype,ftype,avgtype) eq
                                        round(0.8413*card(boot2))),
                                bootcorr(boot,f,weighttype,ftype,avgtype))-
                                sum(boot$(rankcorr(boot,f,weighttype,ftype,avgtype) eq
                                         round(0.1587*card(boot2))),
                                bootcorr(boot,f,weighttype,ftype,avgtype))
                                ));

st_errsharecorr(sharetype,sharetype2)=ABS(0.5*     (
                                sum(boot$(ranksharecorr(boot,sharetype,sharetype2) eq
                                        round(0.8413*card(boot2))),
                                bootsharecorr(boot,sharetype,sharetype2))-
                               sum(boot$(ranksharecorr(boot,sharetype,sharetype2) eq
                                        round(0.1587*card(boot2))),
                                bootsharecorr(boot,sharetype,sharetype2))
                                ));



crit95lo(params,i_)=sum(boot$(rank(boot,params,i_) eq
                                        round(0.975*(card(boot2)))),
                                bootcoef(boot,params,i_));

crit95up(params,i_)=sum(boot$(rank(boot,params,i_) eq
                                        round(0.025*(card(boot2)))),
                                bootcoef(boot,params,i_));

display median,  st_err,crit95lo, crit95up, bias;

coeffs(params, "st_err", i_) = st_err(params,i_);
coeffs(params,"crit95lo", i_) = crit95lo(params,i_);
coeffs(params,"crit95up", i_) = crit95up(params,i_);
coeffs(params, "bias", i_) = bias(params,i_);
coeffs(params, "median coeff", i_) = median(params,i_);


* significance :

coeffs("sigma","t-stat", i_)$coeffs("sigma", "st_err", i_) = (coeffs("sigma", "coeff", i_) - 1) / coeffs("sigma", "st_err", i_) ;
coeffs("eta","t-stat", i_)$coeffs("eta", "st_err", i_) = (coeffs("eta", "coeff", i_)-0) / coeffs("eta", "st_err", i_) ;
coeffs("mu","t-stat", i_)$coeffs("mu", "st_err", i_) = (coeffs("mu", "coeff", i_)-0) / coeffs("mu", "st_err", i_) ;

coeffs("sigma","sig", i_)$(abs(coeffs("sigma","t-stat", i_)) >1.96) = 1;
coeffs("eta","sig", i_)$(abs(coeffs("eta","t-stat", i_)) >1.96) = 1;
coeffs("mu","sig", i_)$(abs(coeffs("mu","t-stat", i_)) >1.96) = 1;



correlations(f,"std err",weighttype,ftype,avgtype) = st_errcorr(f,weighttype,ftype,avgtype);
correlations(f,"median coeff",weighttype,ftype,avgtype) = mediancorr(f,weighttype,ftype,avgtype);

sharecorrelations("direct", "std err",sharetype,sharetype2) = st_errsharecorr(sharetype,sharetype2);
sharecorrelations("direct", "median",sharetype,sharetype2) = mediansharecorr(sharetype,sharetype2);


display reportlambda;

execute_unload 'results\CRIE_%objective%_%spec%.gdx', coeffs, lambda, specificationstats
theta,  fe , sigma_scale, st_err, nbbootiterations, shares,relshares, sharecorrelations, correlations,
nbr, nbi, bootcorr, bootcoef,fittedPCexp, relshares, avgfactorratio, importshare,  tradepatterns,
fct, factorcontent, avgfactorcontent, nettrade, trade,estdata, indexp
;

